package com.dhj;


import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.io.File;

/**
 * @author DaHuaJia
 * @Description DOM操作
 * @Date 2021-11-13 13:58:34
 */
public class TestDOM {

    /**
     * 1.根据id查询元素getElementById
     * 2.根据标签获取元素getElementsByTag
     * 3.根据class获取元素getElementsByClass
     * 4.根据属性获取元素getElementsByAttribute
     */

    public static void main(String[] args) throws Exception {
        Document doc = Jsoup.parse(new File("D:\\FreeSpace\\2021\\crawler\\secondCrawler\\src\\main\\resources\\ningmeng.html"), "utf8");

        // Id
        Element id = doc.getElementById("msoType");
        System.out.println(id.text());

        // Tag
        Element tag = doc.getElementsByTag("title").first();
        System.out.println(tag.text());

        // Class
        Element eClass = doc.getElementsByClass("scSmallBox").first();
        System.out.println(eClass.text());

        // Attribute
        Element attr = doc.getElementsByAttribute("dhj").first();
        System.out.println(attr.text());

        // Attribute + value
        Element attrValue = doc.getElementsByAttributeValue("dhj", "test").first();
        System.out.println(attrValue.text());

    }

}
